perm filename DIPDAT.SAI[5,ALS] blob sn#001155 filedate 1972-01-27 generic text, type T, neo UTF8
00010	BEGIN "DIPDAT" COMMENT 21-JAN-72;
00020	COMMENT Reads disk file "PHON" containing words and phonetic 
00030	  transcriptions.  Creates a disk file containing counts of the number
00040	  of times each diphone is used in the reviewed list.
00050	;
00060	
00070	REQUIRE "MACROS[SYS,JKS]" SOURCE_FILE;
00080	REQUIRE "COMSUB.HDR[SYS,JKS]" SOURCE_FILE;
00090	
00100	INTEGER IIII,JJJJ,KKKK,LL,QQ,XXXX,YYYY;
00110	INTEGER ARRAY AAAA,BBBB[0:700];
00120	INTEGER BRK,BRK1,EOF,I,L,COUNT,CHOICE,WORDNO;
00130	STRING TSTR,DPH1,DPH2,PH1,PH2,LIN,WORD,LIN1;
00140	STRING LIST;
00150	
00160	SETBR;
00170	OPEN(DSK,"DSK",1,2,0,120,BRK,EOF);
00180	OPEN(DSKO,"DSK",1,0,2,120,BRK,EOF);
00190	OPEN(TTY,"TTY",1,1,1,120,BRK,EOF);
00200	
00210	LOOKIN(DSK,TSTR←"PHON");
00220	ENTEROUT(DSKO,TSTR←"DIPDAT.LST");
00230	OUT(DSKO,DATIME("ALL")&TB&"Unweighted Diphone count"&CR&LF&LF);
00240	
00250	EOF ← FALSE; KKKK←COUNT ← WORDNO ← 0;
00260	
00270	WHILE ¬EOF DO
00280	BEGIN "READ"
00290	LIN ← INPUT(DSK,1);
00300	TSTR ← SCAN(LIN,6,BRK);
00310	IF TSTR≠NULL THEN BEGIN WORD ← TSTR; WORDNO ← WORDNO + 1 END;
00320	LIN1←WORD&TB&LIN;
00330	IF LIN[1 FOR 1]=TB THEN LIN←LIN[2 TO ∞];
00340	TSTR←SCAN(LIN,6,BRK);
00350	TSTR←SCAN(LIN,6,BRK); 
00360	TSTR←SCAN(LIN,6,BRK);
00370	LIN←LIN&CR;
00380	BRK ← 0; PH1←SCAN(LIN,10,BRK); PH2 ← " ";
00390	WHILE (LENGTH(LIN)>0) AND ¬EQU(PH2,"XX") DO
00400	 BEGIN
00410	  IF BRK≠CR THEN
00420	   BEGIN
00430	    IF BRK=TB THEN PH1←SCAN(LIN,10,BRK);
00440	    PH2 ← SCAN(LIN,10,BRK);
00450	   END
00460	 ELSE PH2 ← "XX";
00470	
00480	XXXX←CVASC(PH1&"-"&PH2);
00490	FOR I←0 STEP 1 UNTIL 699 DO
00500	 BEGIN
00510	  IF AAAA[I]=0 THEN DONE;
00520	  IF XXXX=BBBB[I] THEN
00530	   BEGIN
00540	    AAAA[I]←AAAA[I]+1; KKKK←KKKK+1;
00550	    DONE
00560	   END;
00570	 END;
00580	IF AAAA[I]=0 THEN
00590	 BEGIN
00600	  AAAA[I]←1; BBBB[I]←XXXX; COUNT←COUNT+1; KKKK←KKKK+1;
00610	 END;
00620	  PH1←PH2;
00630	 END;
00640	IF INCHRS=" " THEN
00650	BEGIN OUT(TTY," RUN ABORTED AT WORD # "&CVS(WORDNO)&" WITH "&CVS(COUNT)&" DIPHONES."); DONE END; C SPACE STOPS RUN;
00660	END "READ";
00670	OUT(DSKO,"Usage counts in 10% groups for the "&CVS(COUNT)&" diphones in "
00680	&CVS(WORDNO)&" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
00690	KKKK←KKKK%10+1;
00700	
00710	COMMENT We must now sort file in decending order of A[I];
00720	FOR JJJJ←698 STEP -1 UNTIL 0 DO
00730	  FOR I←JJJJ STEP 1 UNTIL 699 DO
00740	     BEGIN
00750	      IF AAAA[I]>AAAA[I+1] THEN DONE;
00760	      IF AAAA[I]=AAAA[I+1] THEN IF BBBB[I]≤BBBB[I+1] THEN DONE;
00770	      XXXX←AAAA[I]; AAAA[I]←AAAA[I+1]; AAAA[I+1]←XXXX;
00780	      XXXX←BBBB[I]; BBBB[I]←BBBB[I+1]; BBBB[I+1]←XXXX;
00790	     END;
00800	
00810	COMMENT Now prepare listing;
00820	I←0; COUNT←2; SETFORMAT(5,0); LL←AAAA[I]; QQ←1;
00830	
00840	OUT(DSKO,CVS(AAAA[I])&TB&CVSTR(BBBB[I])&TB);
00850	FOR I←1 STEP 1 UNTIL 699 DO
00860	 BEGIN
00870	  IF AAAA[I]=0 THEN DONE;
00880	  IF AAAA[I]≠AAAA[I-1] THEN 
00890	   BEGIN
00900	    IF COUNT MOD 2 =1 THEN
00910	     BEGIN
00920	      OUT(DSKO,TB); COUNT←COUNT+1;
00930	     END;
00940	    IF COUNT≥3 THEN BEGIN
00950	     FOR L←1 STEP 1 UNTIL 14 DO
00960	      IF AAAA[I]≠AAAA[I+L] THEN DONE;
00970	     IF COUNT+ L≥14 THEN
00980	     BEGIN
00990	      OUT(DSKO,CRLF);
01000	      COUNT←0; JJJJ←AAAA[I];
01010	     END;
01020	    END;
01030	    IF COUNT≥13 THEN
01040	     BEGIN
01050	      OUT(DSKO,CRLF);
01060	      COUNT←0; JJJJ←AAAA[I];
01070	     END;
01080	    OUT(DSKO,CVS(AAAA[I])&TB);
01090	    COUNT←COUNT+1;
01100	   END
01110	  ELSE
01120	   BEGIN
01130	    IF COUNT≥14 THEN
01140	     BEGIN
01150	      OUT(DSKO,CRLF);
01160	      IF AAAA[I]≠JJJJ THEN 
01170	       BEGIN
01180	        JJJJ←AAAA[I];
01190	        OUT(DSKO,CVS(AAAA[I]));
01200	       END;
01210	      OUT(DSKO,TB);
01220	      COUNT←1;
01230	     END;
01240	   END;
01250	  OUT(DSKO,CVSTR(BBBB[I])&TB);
01260	  COUNT←COUNT+1;
01270	  LL←LL+AAAA[I]; QQ←QQ+1;
01280	  IF LL≥KKKK THEN BEGIN
01290	    LL←LL-KKKK; OUT(DSKO,CRLF&"****"&TB&CVS(QQ)&" Diphones"); COUNT←14; END;
01300	 END;
01310	CLOSE(DSK);
01320	
01330	OUT(DSKO,CR&FF&"Diphones found more than once in "&cvs(wordno)
01340	    &" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
01350	I←0; WHILE AAAA[I]>1 DO I←I+1; IIII←I;
01360	
01370	SETFORMAT(1,0);
01380	FOR JJJJ←IIII-2 STEP -1 UNTIL 0 DO
01390	 FOR I←JJJJ STEP 1 UNTIL IIII-2 DO
01400	  IF BBBB[I]>BBBB[I+1] THEN BEGIN
01410	   XXXX←BBBB[I]; BBBB[I]←BBBB[I+1]; BBBB[I+1]←XXXX; END
01420	  ELSE DONE;
01430	
01440	COUNT←0;
01450	FOR I←0 STEP 1 UNTIL IIII-1 DO
01460	 BEGIN
01470	  OUT(DSKO,CVSTR(BBBB[I])&TB);
01480	  COUNT←COUNT+1;
01490	  IF COUNT≥14 THEN BEGIN OUT(DSKO,CRLF); COUNT←0; END;
01500	 END;
01510	
01520	OUT(DSKO,CR&FF&"Diphones found in only one word of "&CVS(WORDNO)
01530	    &" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
01540	
01550	OPEN(DSK,"DSK",1,2,0,120,BRK,EOF);
01560	LOOKIN(DSK,TSTR←"PHON");
01570	EOF ← FALSE; COUNT ← WORDNO ← 0;
01580	
01590	WHILE ¬EOF DO
01600	BEGIN "REVIEW"
01610	LIN ← INPUT(DSK,1);
01620	TSTR ← SCAN(LIN,6,BRK);
01630	IF TSTR≠NULL THEN BEGIN WORD ← TSTR; WORDNO ← WORDNO + 1 END;
01640	LIN1←WORD&TB&LIN;
01650	IF LIN[1 FOR 1]=TB THEN LIN←LIN[2 TO ∞];
01660	TSTR←SCAN(LIN,6,BRK);
01670	TSTR←SCAN(LIN,6,BRK);
01680	TSTR←SCAN(LIN,6,BRK);
01690	LIN←LIN&CR;
01700	BRK ← 0; PH1←SCAN(LIN,10,BRK); PH2 ← " ";
01710	WHILE (LENGTH(LIN)>0) AND ¬EQU(PH2,"XX") DO
01720	 BEGIN
01730	  IF BRK≠CR THEN
01740	   BEGIN
01750	    IF BRK=TB THEN PH1←SCAN(LIN,10,BRK);
01760	    PH2 ← SCAN(LIN,10,BRK);
01770	   END
01780	 ELSE PH2 ← "XX";
01790	
01800	XXXX←CVASC(PH1&"-"&PH2);
01810	FOR I←IIII STEP 1 UNTIL 699 DO
01820	 BEGIN
01830	  IF AAAA[I]=0 THEN DONE;
01840	  IF XXXX=BBBB[I] THEN
01850	   BEGIN
01860	    OUT(DSKO,CVSTR(BBBB[I])&TB&WORD&CRLF);
01870	    DONE
01880	   END;
01890	 END;
01900	PH1←PH2;
01910	END;
01920	END "REVIEW";
01930	
01940	XXXX←CVASC("ZZZZZ");
01950	LIST← "A AA AE AR AW B CH D DH E EE F G H I J K L M N NG O OO P R S SH T TH U V W Y Z ZH"&CR;
01960	FOR I←699 STEP -1 UNTIL 0 DO
01970	 IF AAAA[I]=0 THEN BBBB[I]←XXXX ELSE DONE;
01980	KKKK←I;
01990	
02000	FOR JJJJ←KKKK-1 STEP -1 UNTIL 0 DO
02010	 FOR I←JJJJ STEP 1 UNTIL KKKK-1 DO
02020	  IF BBBB[I]>BBBB[I+1] THEN BEGIN
02030	   XXXX←BBBB[I]; BBBB[I]←BBBB[I+1]; BBBB[I+1]←XXXX; END
02040	     ELSE DONE;
02050	
02060	OUT(DSKO,CR&FF&"All diphones found in "
02070	&CVS(WORDNO)&" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
02080	COUNT←0;
02090	FOR I←0 STEP 1 UNTIL 699 DO
02100	 BEGIN
02110	  IF AAAA[I]=0 THEN DONE;
02120	  OUT(DSKO,CVSTR(BBBB[I])&TB);
02130	  COUNT←COUNT+1;
02140	   IF COUNT≥14 THEN BEGIN OUT(DSKO,CRLF); COUNT←0; END;
02150	 END;
02160	CLOSE(DSK);
02170	
02180	OUT(DSKO,CR&FF&"Diphones not found in "
02190	&CVS(WORDNO)&" words used by 2 or more speakers in Jones and Wexman"&CRLF&LF);
02200	COUNT←0; I←0; LIN←LIST&CR; BRK←0;
02210	
02220	WHILE (LENGTH(LIN)>0) DO
02230	 BEGIN
02240	  IF BRK=CR THEN DONE;
02250	  PH1←SCAN(LIN,10,BRK);
02260	  LIN1←LIST&CR;
02270	  BRK1←0;
02280	  WHILE (LENGTH(LIN1)>0) DO
02290	   BEGIN
02300	    IF BRK1=CR THEN DONE;
02310	    PH2←SCAN(LIN1,10,BRK1);
02320	    XXXX←CVASC(PH1&"-"&PH2);
02330	    IF XXXX>BBBB[I] THEN
02340	     BEGIN
02350	      OUT(DSKO,CRLF&CVSTR(BBBB[I])&" encountered as unidentified"&CRLF);
02360	      OUT(TTY,CRLF&CVSTR(BBBB[I])&" encountered as unidentified"&CRLF);
02370	      I←I+1;
02380	     END;
02390	    IF XXXX<BBBB[I] THEN
02400	      BEGIN
02410	       OUT(DSKO,CVSTR(XXXX)&TB);
02420	       COUNT←COUNT+1;
02430	       IF COUNT≥14 THEN BEGIN OUT(DSKO,CRLF); COUNT←0; END;
02440	      END;
02450	     IF XXXX=BBBB[I] THEN I←I+1;
02460	   END;
02470	 END;
02480	
02490	
02500	
02510	CLOSE(DSKO);
02520	CLOSE(DSK);
02530	OUT(TTY,CRLF&"OUTPUT FILE: DIPDAT.LST");
02540	
02550	END "DIPDAT";